Physics-informed Neural Networks (PINN)
Table of Contents
Why do data-driven ‘black-box’ methods fail?
Physics-Informed Neural Networks (PINNs)
The Universal Approximation Theorem
Neural Algorithm for Solving Differential Equations
ANN for ODE and PDE
NN as an universal function approximator
Given
Adding constraints for regularization
This is a result first due to Lagaris et.al from 1998. The idea is to solve differential equations using neural networks by representing the solution by a neural network and training the resulting network to satisfy the conditions required by the differential equation.
Consider a system of ordinary differential equations
$$ {u^\prime} = f(u,t) $$with $t \in [0, 1]$ and a known initial condition $u(0) = u_0$.
To solve this, we approximate the solution by a neural network:
$$ {\text{NN}(t)} \approx {u(t)} $$If $\text{NN}(t)$ was the true solution, then it would hold that $\text{NN}'(t) = f\left(\text{NN}(t),t \right)$ for all $t$. Thus we turn this condtion into our loss function. This motivates the loss function.
$${L(\omega)} = \sum_{i} \left(\frac{d \text{NN}(t_i)}{dt}-f\left(\text{NN}(t_i),t_i \right)\right)^2$$
The choice of $t_i$ could be done in many ways: it can be random, it can be a grid, etc. Anyways, when this loss function is minimized (gradients computed with standard reverse-mode automatic differentiation), then we have that $\frac{dNN(t_i)}{dt} \approx f(\text{NN}(t_i),t_i)$ and thus $\text{NN}(t)$ approximately solves the differential equation.
Note that we still have to handle the initial condition. One simple way to do this is to add an initial condition term to the cost function.
$${L(\omega)} = \sum_{i} \left(\frac{d \text{NN}(t_i)}{dt}-f(\text{NN}(t_i),t_i)\right)^2 + (\text{NN}(0)-u_0)^2 $$
where $\omega$ are the parameters that define the neural network $\text{NN}$ that approximates $u$. Thus this reduces down, once again, to simply finding weights which minimize a loss function !
$$\frac{du}{dt} = \cos2\pi t$$
$$u(0) = 1$$
$$u(t) = \frac{1}{2\pi}\sin2\pi t + 1$$
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import sys
# !pip install tensorflow-probability
def NNODE():
inputs = tf.keras.layers.Input((1,))
layer1 = tf.keras.layers.Dense(32, activation = 'tanh')(inputs)
outputs = tf.keras.layers.Dense(1)(layer1)
model = tf.keras.models.Model(inputs = inputs, outputs = outputs)
return model
NN = NNODE()
NN.summary()
optm = tf.keras.optimizers.Adam(learning_rate = 0.001)
def ode_system(x, net):
x_size = np.shape(x)[0]
x = tf.constant(x, dtype=tf.float32)
x_0 = tf.zeros((x_size,1))
one = tf.ones((x_size, 1))
with tf.GradientTape() as g:
g.watch(x)
y = NN(x)
y_x = g.gradient(y,x)
ode_loss = y_x - tf.math.cos(2*np.pi*x)
bc_loss = net(x_0) - one
loss_square = tf.square(ode_loss) + tf.square(bc_loss)
total_loss = tf.reduce_mean(loss_square)
return total_loss
n_itr = 5000
n_prt = 500
hist = {}
hist['loss'] = []
np.random.seed(2022)
t_train = (np.random.rand(30) * 2).reshape(-1, 1)
t_test = np.linspace(0, 2, 100)
train_loss_record = []
test_loss_record = []
def train_step():
with tf.GradientTape() as tape:
train_loss = ode_system(t_train, NN)
test_loss = ode_system(t_test, NN)
test_loss_record.append(test_loss)
train_loss_record.append(train_loss)
gradients = tape.gradient(train_loss, NN.trainable_variables)
optm.apply_gradients(zip(gradients, NN.trainable_variables))
hist['loss'].append(train_loss.numpy())
def train():
for itr in range(n_itr+1):
train_step()
sys.stdout.write('\r Training...({0}/{1})'.format(itr,n_itr))
if (itr)%n_prt == 0:
print(', Loss: {}'.format(hist['loss'][-1]))
train()
plt.figure()
plt.semilogy(train_loss_record, label="Train loss")
plt.semilogy(test_loss_record, label="Test loss")
plt.legend()
plt.show()
t_true = np.linspace(0, 2, 100)
result = NN.predict(t_true).ravel()
g_true = np.sin(2*np.pi*t_true)/(2*np.pi) + 1
g = np.sin(2*np.pi*t_train)/(2*np.pi) + 1
plt.figure()
plt.plot(t_train, g, 'ok', label = 'Train')
plt.plot(t_true, g_true, '-k',label = 'True')
plt.plot(t_true, result, '--r', label = 'Prediction')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.show()
# !pip install deepxde
from deepxde.backend.set_default_backend import set_default_backend
set_default_backend("tensorflow")
$$\frac{du}{dt} = \cos2\pi t$$
$$u(0) = 1$$
$$u(t) = \frac{1}{2\pi}\sin2\pi t + 1$$
import tensorflow as tf
import deepxde as dde
import numpy as np
import matplotlib.pyplot as plt
import math as m
pi = tf.constant(m.pi)
def ode_system(t, u):
du_t = dde.grad.jacobian(u, t)
return du_t - tf.math.cos(2*pi*t)
def boundary(x, on_initial):
return np.isclose(x[0], 0)
geom = dde.geometry.TimeDomain(0, 2)
ic = dde.IC(geom, lambda x: 1, boundary)
# Reference solution to compute the error
def func(t):
return np.sin(2*np.pi*t)/(2*np.pi) + 1
data = dde.data.PDE(geom,
ode_system,
ic,
num_domain = 28,
num_boundary = 2,
solution = func,
num_test = 100)
print('Total training points: ', data.train_x_all.shape)
print('Training initial points: ', data.train_x_bc.shape)
plt.plot(data.train_x_all, np.zeros([data.train_x_all.shape[0], 1]), '.')
plt.xlabel('Time (t)')
plt.show()
layer_size = [1] + [50] * 3 + [1]
activation = "tanh"
initializer = "Glorot uniform"
net = dde.maps.FNN(layer_size, activation, initializer)
model = dde.Model(data, net)
model.compile("adam", lr = 0.001)
losshistory, train_state = model.train(epochs = 5000)
dde.saveplot(losshistory, train_state, issave = False, isplot = True)
$${\partial^4 u \over \partial x^4} + 1 = 0, \qquad \text{where} \quad x \in [0,1]$$
$$u'(0) = 0$$
$$u''(1) = 0, \quad u'''(1) = 0$$
$$u(x) = -{1 \over 24}x^4 + {1 \over 6}x^3 - {1 \over 4}x^2$$
def ddy(x, y):
return dde.grad.hessian(y, x)
def dddy(x, y):
return dde.grad.jacobian(ddy(x, y), x)
def pde(x, y):
dy_xx = ddy(x, y)
dy_xxxx = dde.grad.hessian(dy_xx, x)
return dy_xxxx + 1
def boundary_l(x, on_boundary):
return on_boundary and np.isclose(x[0], 0)
def boundary_r(x, on_boundary):
return on_boundary and np.isclose(x[0], 1)
geom = dde.geometry.Interval(0, 1)
bc1 = dde.DirichletBC(geom, lambda x: 0, boundary_l) # u(0) = 0
bc2 = dde.NeumannBC(geom, lambda x: 0, boundary_l) # u'(0) = 0
bc3 = dde.OperatorBC(geom, lambda x, y, _: ddy(x, y), boundary_r) # u''(1) = 0
bc4 = dde.OperatorBC(geom, lambda x, y, _: dddy(x, y), boundary_r) # u'''(1) = 0
# Reference solution to compute the error
def func(x):
return -(x ** 4) / 24 + x ** 3 / 6 - x ** 2 / 4
data = dde.data.PDE(geom,
pde,
[bc1, bc2, bc3, bc4],
num_domain = 10,
num_boundary = 2,
solution = func,
num_test = 100)
print('Total training points: ', data.train_x_all.shape)
print('Training initial points: ', data.train_x_bc.shape)
plt.plot(data.train_x_all, np.zeros([data.train_x_all.shape[0], 1]), '.')
plt.xlabel('Time (t)')
plt.show()
layer_size = [1] + [20] * 3 + [1]
activation = "tanh"
initializer = "Glorot uniform"
net = dde.maps.FNN(layer_size, activation, initializer)
model = dde.Model(data, net)
model.compile("adam", lr = 0.001, metrics = ["l2 relative error"])
losshistory, train_state = model.train(epochs = 10000)
dde.saveplot(losshistory, train_state, issave = False, isplot = True)
# !pip install deepxde
import tensorflow as tf
import deepxde as dde
import numpy as np
import matplotlib.pyplot as plt
from deepxde.backend.set_default_backend import set_default_backend
set_default_backend("tensorflow")
$$\rho = 1\operatorname{kg/m^3}, \quad \mu = 1\operatorname{N\cdot s/m^2}, \quad D = 2\operatorname{m}, \quad
L = 1\operatorname{m}, \quad u_{in} = 1\operatorname{m/s}, \quad t \in [0,1]$$
$$\quad D_h = {4A \over p} = \lim\limits_{b\to\infty} {4(2bh) \over {2b+4h}} = 4h = 2\operatorname{m}$$
$$
\begin{align*}
u{\partial u \over \partial x} + v{\partial u \over \partial y} + {1 \over \rho}{\partial p \over \partial x} - \nu \ \left({\partial^2 u \over {\partial x^2}} + {\partial^2 u \over {\partial y^2}}\right) &= 0\\\\
u{\partial v \over \partial x} + v{\partial v \over \partial y} + {1 \over \rho}{\partial p \over \partial y} - \nu \ \left({\partial^2 v \over {\partial x^2}} + {\partial^2 v \over {\partial y^2}}\right) &= 0\\\\
{\partial u \over \partial x} + {\partial v \over \partial y} &= 0
\end{align*}
$$
# Properties
rho = 1
mu = 1
u_in = 1
D = 1
Dh = 2
L = 2
Re = rho * Dh * u_in/mu
def boundary_tube(x, on_boundary):
_on_tube = np.logical_and(np.logical_or(np.isclose(x[1], -D/2), np.isclose(x[1], D/2)), on_boundary)
return _on_tube
def boundary_flow(x, on_boundary):
return on_boundary and np.isclose(x[0], -L/2)
def boundary_end(x, on_boundary):
return on_boundary and np.isclose(x[0], L/2)
def pde(x, y):
du_x = dde.grad.jacobian(y, x, i = 0, j = 0)
du_y = dde.grad.jacobian(y, x, i = 0, j = 1)
dv_x = dde.grad.jacobian(y, x, i = 1, j = 0)
dv_y = dde.grad.jacobian(y, x, i = 1, j = 1)
dp_x = dde.grad.jacobian(y, x, i = 2, j = 0)
dp_y = dde.grad.jacobian(y, x, i = 2, j = 1)
du_xx = dde.grad.hessian(y, x, i = 0, j = 0, component = 0)
du_yy = dde.grad.hessian(y, x, i = 1, j = 1, component = 0)
dv_xx = dde.grad.hessian(y, x, i = 0, j = 0, component = 1)
dv_yy = dde.grad.hessian(y, x, i = 1, j = 1, component = 1)
pde_u = y[:,0:1]*du_x + y[:,1:2]*du_y + 1/rho * dp_x - (mu/rho)*(du_xx + du_yy)
pde_v = y[:,0:1]*dv_x + y[:,1:2]*dv_y + 1/rho * dp_y - (mu/rho)*(dv_xx + dv_yy)
pde_cont = du_x + dv_y
return [pde_u, pde_v, pde_cont]
geom = dde.geometry.Polygon([[-L/2, -D/2], [-L/2, D/2], [L/2, D/2], [L/2, 0], [L/2, -D/2]])
bc_tube_u = dde.DirichletBC(geom, lambda x: 0., boundary_tube, component = 0)
bc_tube_v = dde.DirichletBC(geom, lambda x: 0., boundary_tube, component = 1)
bc_flow_u = dde.DirichletBC(geom, lambda x: u_in, boundary_flow, component = 0)
bc_flow_v = dde.DirichletBC(geom, lambda x: 0., boundary_flow, component = 1)
bc_end_p = dde.DirichletBC(geom, lambda x: 0., boundary_end, component = 2)
bc_end_v = dde.DirichletBC(geom, lambda x: 0., boundary_end, component = 1)
data = dde.data.PDE(geom,
pde,
[bc_tube_u, bc_tube_v, bc_flow_u, bc_flow_v, bc_end_v, bc_end_p],
num_domain = 2000,
num_boundary = 200,
num_test = 100)
plt.scatter(data.train_x_all[:,0], data.train_x_all[:,1], s=0.5)
plt.xlabel('x-direction length')
plt.ylabel('Distance from the middle of plates (m)')
plt.show()
layer_size = [2] + [64] * 5 + [3]
activation = "tanh"
initializer = "Glorot uniform"
net = dde.maps.FNN(layer_size, activation, initializer)
model = dde.Model(data, net)
model.compile("adam", lr = 1e-3)
losshistory, train_state = model.train(epochs = 10000, display_every = 1000)
dde.saveplot(losshistory, train_state, issave = False, isplot = True)
samples = geom.random_points(500000)
result = model.net(samples)
color_legend = [[0, 1.5], [-0.3, 0.3], [0, 35]]
for idx in range(3):
plt.figure(figsize = (20, 4))
plt.scatter(samples[:, 0],
samples[:, 1],
c = result[:, idx],
cmap = 'jet',
s = 2)
plt.colorbar()
plt.clim(color_legend[idx])
plt.xlim((0-L/2, L-L/2))
plt.ylim((0-D/2, D-D/2))
plt.tight_layout()
plt.show()
dde.optimizers.config.set_LBFGS_options(maxiter = 10000)
model.compile("L-BFGS")
losshistory, train_state = model.train()
dde.saveplot(losshistory, train_state, issave = False, isplot = True)
samples = geom.random_points(500000)
result = model.net(samples)
color_legend = [[0, 1.5], [-0.3, 0.3], [0, 35]]
for idx in range(3):
plt.figure(figsize = (20, 4))
plt.scatter(samples[:, 0],
samples[:, 1],
c = result[:, idx],
cmap = 'jet',
s = 2)
plt.colorbar()
plt.clim(color_legend[idx])
plt.xlim((0-L/2, L-L/2))
plt.ylim((0-D/2, D-D/2))
plt.tight_layout()
plt.show()
$$u(y) = {3V_{avg} \over 2} \left[ 1- \left( \frac{y}{h} \right)^2 \right]$$
# Analytical solution
x = np.ones([1000,1])
y = np.linspace(-0.5, 0.5, 1000).reshape(1000,1)
end = np.hstack([x, y])
gt_flow = u_in * 1.5 * (1 - ((y)/(D/2))**2)
result = model.net(end)
# CFD solution
import pandas as pd
vel_plot = pd.read_csv('./data_files/vel_plot_x1.txt',sep = '\t', header = None)
for idx in range(1):
plt.figure(figsize = (9, 6))
plt.plot(y, result[:, idx], c = 'tomato', linewidth = 3, label = 'PINN solution')
plt.plot(y, gt_flow, c = 'k', linestyle = '--', label = 'Analytical solution')
plt.plot(np.linspace(-0.5,0.5,502), vel_plot[1], c = 'green', linewidth = 3, label = 'CFD')
plt.xticks(np.linspace(-0.5, 0.5, 5), fontsize = 15)
plt.yticks(np.linspace(0, 1.5, 11), fontsize = 15)
plt.tight_layout()
plt.legend(fontsize = 12)
plt.xlabel('Distance from the middle of plates (m)', fontsize = 15)
plt.ylabel('Velocity ($u$)', fontsize = 15)
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')